The purpose of this notebook is to annotate the cell types of the two new T cell culturing & activation experiments (replicate 2). It will follow the same steps as replicate 1, so we refer to the notebook “03-annotation.Rmd” for a full explanation of each step.
library(scater)
library(scran)
library(Seurat)
library(ggpubr)
library(kBET)
library(cluster)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(viridis)
library(tidyverse)
source("bin/utils.R")
t_act_l <- readRDS("results/R_objects/t_act_Seurat_list_filtered_normalized.rds")
Let us filter the previous list to retain only replicate 2:
t_act_l <- t_act_l[c("day_0_rep2", "day_1_rep2")]
t_act_0_rep2 <- SplitObject(t_act_l$day_0_rep2, split.by = "donor")
t_act_1_rep2 <- SplitObject(t_act_l$day_1_rep2, split.by = "donor")
t_act_l <- list(
t_act_0_rep2$Female2,
t_act_0_rep2$Female3,
t_act_1_rep2$Female2,
t_act_1_rep2$Female3
)
names(t_act_l) <- c("0_rep2_F2", "0_rep2_F3", "1_rep2_F2", "1_rep2_F3")
t_act_l <- purrr::map(t_act_l, FindVariableFeatures)
var_plots <- purrr::map(t_act_l, VariableFeaturePlot)
var_plots <- purrr::map2(t_act_l, var_plots, function(seurat, p) {
LabelPoints(
plot = p,
points = head(VariableFeatures(seurat), 10),
repel = TRUE
)
})
var_plots
## $`0_rep2_F2`
##
## $`0_rep2_F3`
##
## $`1_rep2_F2`
##
## $`1_rep2_F3`
Again, in the activated cells (day 1), we see that interferon gamma is the most variable gene.
t_act_l <- purrr::map(t_act_l, ScaleData)
t_act_l <- purrr::map(t_act_l, RunPCA)
purrr::map(t_act_l, VizDimLoadings, dims = 1:3, reduction = "pca")
## $`0_rep2_F2`
##
## $`0_rep2_F3`
##
## $`1_rep2_F2`
##
## $`1_rep2_F3`
purrr::map(t_act_l, DimPlot, reduction = "pca")
## $`0_rep2_F2`
##
## $`0_rep2_F3`
##
## $`1_rep2_F2`
##
## $`1_rep2_F3`
# Cluster cells
t_act_l <- purrr::map(t_act_l, FindNeighbors, dims = 1:20)
# resolutions <- c(
# 0.005,
# seq(0.01, 0.1, by = 0.01),
# seq(0.1, 1, by = 0.1), seq(1, 10, by = 1)
# )
# avgs_sil_width_dfs <- purrr::map2(t_act_l, names(t_act_l), function(seurat, condition) {
# print(condition)
# avgs_sil_width_df <- data.frame(num_k = c(), sil_width = c(), resolution = c())
# num_k <- 1
# for (res in resolutions) {
# print(str_c("Current resolution is ", res))
# seurat <- FindClusters(seurat, resolution = res, verbose = FALSE)
# curr_num_k <- length(levels(seurat$seurat_clusters))
# if (curr_num_k == num_k) {
# next
# } else {
# sil_width <- calc_sil_width(
# object = seurat,
# clusters = seurat$seurat_clusters,
# npcs = 5,
# ncell = 2500
# )
# print(str_c("Current silhouette width is ", sil_width))
# curr_df <- data.frame(num_k = curr_num_k, sil_width = sil_width, resolution = res)
# avgs_sil_width_df <- rbind(avgs_sil_width_df, curr_df)
# num_k <- curr_num_k
# print(str_c("Current number of clusters is ", num_k))
# }
# }
# avgs_sil_width_df
# })
# resolution_plots <- purrr::map2(avgs_sil_width_dfs, names(avgs_sil_width_dfs), function(df, cond) {
# ggplot(df, aes(num_k, sil_width, label = num_k)) +
# geom_text() +
# labs(title = cond,
# x = "Number of clusters (k)",
# y = "Average Silhouette Width") +
# theme_classic() +
# theme(plot.title = element_text(hjust = 0.5))
# })
# resolution_plots
We aim to determine a number of clusters that maximizes silhouette width while preventing overstratification. Judgding by the previous plots, we define the following k:
# optimal_k <- c(5, 4, 5, 5)
# t_act_l <- purrr::pmap(list(t_act_l, avgs_sil_width_dfs, optimal_k), function(seurat, df, opt_k) {
# opt_resolution <- df[df$num_k == opt_k, "resolution"]
# seurat <- FindClusters(object = seurat, resolution = opt_resolution)
# seurat
# })
# purrr::map(t_act_l, ~ length(levels(.x$seurat_clusters)))
resolutions <- c(0.1, 0.1, 0.3, 0.15)
t_act_l <- purrr::map2(t_act_l, resolutions, function(seurat, res) {
seurat <- FindClusters(seurat, resolution = res)
seurat
})
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4753
## Number of edges: 195957
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9702
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4153
## Number of edges: 168564
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9654
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3963
## Number of edges: 146453
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8835
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4951
## Number of edges: 181035
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9257
## Number of communities: 5
## Elapsed time: 0 seconds
t_act_l <- purrr::map(t_act_l, function(seurat) {
seurat@reductions$PCA <- NULL
seurat@reductions$TSNE <- NULL
seurat@reductions$UMAP <- NULL
seurat
})
t_act_l <- purrr::map(t_act_l, RunTSNE, reduction = "pca", dims = 1:20)
t_act_l <- purrr::map(t_act_l, RunUMAP, reduction = "pca", dims = 1:20)
purrr::map(t_act_l, DimPlot, reduction = "tsne")
## $`0_rep2_F2`
##
## $`0_rep2_F3`
##
## $`1_rep2_F2`
##
## $`1_rep2_F3`
purrr::map(t_act_l, DimPlot, reduction = "umap")
## $`0_rep2_F2`
##
## $`0_rep2_F3`
##
## $`1_rep2_F2`
##
## $`1_rep2_F3`
markers_l <- purrr::map(t_act_l, FindAllMarkers, only.pos = TRUE)
purrr::map(markers_l, DT::datatable)
## $`0_rep2_F2`
##
## $`0_rep2_F3`
##
## $`1_rep2_F2`
##
## $`1_rep2_F3`
DT::datatable(markers_l$`0_rep2_F2`)
DT::datatable(markers_l$`0_rep2_F3`)
DT::datatable(markers_l$`1_rep2_F2`)
DT::datatable(markers_l$`1_rep2_F3`)
# saveRDS(markers_l, "results/R_objects/t_act_markers_list_rep2.rds")
marker_selection <- purrr::map2(t_act_l, markers_l, function(seurat, df) {
num_k <- length(unique(seurat$seurat_clusters))
selection <- unlist(purrr::map(0:(num_k-1), ~df[df$cluster == .x, "gene"][0:8]))
selection
})
heatmaps_markers <- purrr::map2(t_act_l, marker_selection, ~DoHeatmap(.x, features = .y) + NoLegend())
heatmaps_markers
## $`0_rep2_F2`
##
## $`0_rep2_F3`
##
## $`1_rep2_F2`
##
## $`1_rep2_F3`
t_act_l <- purrr::map(t_act_l, function(seurat) {
s_genes <- cc.genes$s.genes[cc.genes$s.genes %in% rownames(seurat@assays$RNA@scale.data)]
g2m_genes <- cc.genes$g2m.genes[cc.genes$g2m.genes %in% rownames(seurat@assays$RNA@scale.data)]
seurat <- CellCycleScoring(object = seurat, s.features = s_genes, g2m.features = g2m_genes)
seurat
})
cycling_gg <- purrr::map(t_act_l, FeaturePlot, features = c("S.Score", "G2M.Score"), cols = viridis(20))
cycling_gg
## $`0_rep2_F2`
##
## $`0_rep2_F3`
##
## $`1_rep2_F2`
##
## $`1_rep2_F3`
| Cluster ID | Cell type |
|---|---|
| 0 | CD4 T-cell |
| 1 | NK |
| 2 | Monocyte |
| 3 | B-cell |
| 4 | CD8 T-cell |
| 5 | FCGR3A Monocyte |
| 6 | Unknown |
# CD4+ T cells show high expression of IL7R.
# CD8+ T cells show high expression of both NKG7 and CD3D.
# NK cells show high expression of both NKG7 and GNLY.
FeaturePlot(t_act_l$`0_rep2_F2`, features = c("IL7R", "CD3D", "NKG7", "GNLY"))
t_act_l$`0_rep2_F2`$cell_type <- case_when(
t_act_l$`0_rep2_F2`$seurat_clusters == "0" ~ "CD4 T-cell",
t_act_l$`0_rep2_F2`$seurat_clusters == "1" ~ "NK",
t_act_l$`0_rep2_F2`$seurat_clusters == "2" ~ "Monocyte",
t_act_l$`0_rep2_F2`$seurat_clusters == "3" ~ "B-cell",
t_act_l$`0_rep2_F2`$seurat_clusters == "4" ~ "CD8 T-cell",
t_act_l$`0_rep2_F2`$seurat_clusters == "5" ~ "FCGR3A Monocyte",
t_act_l$`0_rep2_F2`$seurat_clusters == "6" ~ "Unknown"
)
Idents(t_act_l$`0_rep2_F2`) <- "cell_type"
DimPlot(t_act_l$`0_rep2_F2`, reduction = "umap", label = TRUE) + NoLegend()
| Cluster ID | Cell type |
|---|---|
| 0 | CD4 T-cell |
| 1 | Monocyte |
| 2 | CD8 T-cell |
| 3 | NK |
| 4 | B-cell |
| 5 | FCGR3A Monocyte |
FeaturePlot(t_act_l$`0_rep2_F3`, features = c("IL7R", "CD3D", "NKG7", "GNLY"))
t_act_l$`0_rep2_F3`$cell_type <- case_when(
t_act_l$`0_rep2_F3`$seurat_clusters == "0" ~ "CD4 T-cell",
t_act_l$`0_rep2_F3`$seurat_clusters == "1" ~ "Monocyte",
t_act_l$`0_rep2_F3`$seurat_clusters == "2" ~ "CD8 T-cell",
t_act_l$`0_rep2_F3`$seurat_clusters == "3" ~ "NK",
t_act_l$`0_rep2_F3`$seurat_clusters == "4" ~ "B-cell",
t_act_l$`0_rep2_F3`$seurat_clusters == "5" ~ "FCGR3A Monocyte"
)
Idents(t_act_l$`0_rep2_F3`) <- "cell_type"
DimPlot(t_act_l$`0_rep2_F3`, reduction = "umap", label = TRUE) + NoLegend()
| Cluster ID | Cell type |
|---|---|
| 0 | Cycling CD4 T-cell |
| 1 | NK |
| 2 | Activated CD4 T-cell |
| 3 | B-cell |
| 4 | CD8 T-cell |
| 5 | Unknown |
FeaturePlot(t_act_l$`1_rep2_F2`, features = c("IL7R", "CD3D", "NKG7", "GNLY"))
t_act_l$`1_rep2_F2`$cell_type <- case_when(
t_act_l$`1_rep2_F2`$seurat_clusters == "0" ~ "Cycling CD4 T-cell",
t_act_l$`1_rep2_F2`$seurat_clusters == "1" ~ "NK",
t_act_l$`1_rep2_F2`$seurat_clusters == "2" ~ "Activated CD4 T-cell",
t_act_l$`1_rep2_F2`$seurat_clusters == "3" ~ "B-cell",
t_act_l$`1_rep2_F2`$seurat_clusters == "4" ~ "CD8 T-cell",
t_act_l$`1_rep2_F2`$seurat_clusters == "5" ~ "Unknown"
)
Idents(t_act_l$`1_rep2_F2`) <- "cell_type"
DimPlot(t_act_l$`1_rep2_F2`, reduction = "umap", label = TRUE) + NoLegend()
| Cluster ID | Cell type |
|---|---|
| 0 | CD4 T-cell |
| 1 | NK |
| 2 | CD8 T-cell |
| 3 | B-cell |
| 4 | Unknown |
FeaturePlot(t_act_l$`1_rep2_F3`, features = c("IL7R", "CD3D", "NKG7", "GNLY"))
t_act_l$`1_rep2_F3`$cell_type <- case_when(
t_act_l$`1_rep2_F3`$seurat_clusters == "0" ~ "CD4 T-cell",
t_act_l$`1_rep2_F3`$seurat_clusters == "1" ~ "NK",
t_act_l$`1_rep2_F3`$seurat_clusters == "2" ~ "CD8 T-cell",
t_act_l$`1_rep2_F3`$seurat_clusters == "3" ~ "B-cell",
t_act_l$`1_rep2_F3`$seurat_clusters == "4" ~ "Unknown"
)
Idents(t_act_l$`1_rep2_F3`) <- "cell_type"
# CD4 T-cell can be subdivided into cycling and activated (looking at the S score and previous donor)
seurat_0 <- t_act_l$`1_rep2_F3`
seurat_0 <- subset(seurat_0, idents = "CD4 T-cell")
seurat_0 <- pre_process_seurat(seurat_0)
seurat_0 <- FindNeighbors(seurat_0)
seurat_0 <- FindClusters(seurat_0, resolution = 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3218
## Number of edges: 103122
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8649
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(seurat_0)
markers_0 <- FindAllMarkers(seurat_0, only.pos = TRUE)
FeaturePlot(seurat_0, features = c("S.Score", "POLR2H", "POLR3K"))
cells_0_0 <- colnames(seurat_0)[seurat_0$seurat_clusters == "0"]
cells_0_1 <- colnames(seurat_0)[seurat_0$seurat_clusters == "1"]
t_act_l$`1_rep2_F3`$cell_type[colnames(t_act_l$`1_rep2_F3`) %in% cells_0_0] <- "Cycling CD4 T-cell"
t_act_l$`1_rep2_F3`$cell_type[colnames(t_act_l$`1_rep2_F3`) %in% cells_0_1] <- "Activated CD4 T-cell"
# Plot
DimPlot(t_act_l$`1_rep2_F3`, reduction = "umap", label = TRUE) + NoLegend()
# saveRDS(t_act_l, file = "results/R_objects/t_act_Seurat_list_reannotated_rep2.rds")
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.0 stringr_1.4.0 dplyr_0.8.4 purrr_0.3.3 readr_1.3.1 tidyr_1.0.2 tibble_2.1.3 tidyverse_1.3.0 viridis_0.5.1 viridisLite_0.3.0 org.Hs.eg.db_3.10.0 AnnotationDbi_1.48.0 cluster_2.1.0 kBET_0.99.6 ggpubr_0.2.5 magrittr_1.5 Seurat_3.1.4 scran_1.14.6 scater_1.14.6 ggplot2_3.3.0 SingleCellExperiment_1.8.0 SummarizedExperiment_1.16.1 DelayedArray_0.12.2 BiocParallel_1.20.1 matrixStats_0.55.0 Biobase_2.46.0 GenomicRanges_1.38.0 GenomeInfoDb_1.22.0 IRanges_2.20.2 S4Vectors_0.24.3 BiocGenerics_0.32.0 BiocStyle_2.14.4
##
## loaded via a namespace (and not attached):
## [1] reticulate_1.14 tidyselect_1.0.0 RSQLite_2.2.0 htmlwidgets_1.5.1 grid_3.6.1 Rtsne_0.15 munsell_0.5.0 codetools_0.2-16 mutoss_0.1-12 ica_1.0-2 DT_0.12 statmod_1.4.34 future_1.16.0 withr_2.1.2 colorspace_1.4-1 knitr_1.28 rstudioapi_0.11 ROCR_1.0-7 ggsignif_0.6.0 gbRd_0.4-11 listenv_0.8.0 labeling_0.3 Rdpack_0.11-1 GenomeInfoDbData_1.2.2 mnormt_1.5-6 farver_2.0.3 bit64_0.9-7 vctrs_0.2.3 generics_0.0.2 TH.data_1.0-10 xfun_0.12 R6_2.4.1 ggbeeswarm_0.6.0 rsvd_1.0.3 locfit_1.5-9.1 bitops_1.0-6 assertthat_0.2.1 promises_1.1.0 scales_1.1.0 multcomp_1.4-12 beeswarm_0.2.3 gtable_0.3.0 npsurv_0.4-0 globals_0.12.5 sandwich_2.5-1 rlang_0.4.5 splines_3.6.1
## [48] lazyeval_0.2.2 broom_0.5.5 BiocManager_1.30.10 yaml_2.2.1 reshape2_1.4.3 modelr_0.1.6 crosstalk_1.0.0 backports_1.1.5 httpuv_1.5.2 tools_3.6.1 bookdown_0.18 gplots_3.0.3 RColorBrewer_1.1-2 ggridges_0.5.2 TFisher_0.2.0 Rcpp_1.0.3 plyr_1.8.6 zlibbioc_1.32.0 RCurl_1.98-1.1 pbapply_1.4-2 cowplot_1.0.0 zoo_1.8-7 haven_2.2.0 ggrepel_0.8.1 fs_1.3.2 RSpectra_0.16-0 magick_2.3 data.table_1.12.8 lmtest_0.9-37 reprex_0.3.0 RANN_2.6.1 mvtnorm_1.1-0 fitdistrplus_1.0-14 xtable_1.8-4 mime_0.9 hms_0.5.3 patchwork_1.0.0 lsei_1.2-0 evaluate_0.14 readxl_1.3.1 gridExtra_2.3 compiler_3.6.1 KernSmooth_2.23-16 crayon_1.3.4 htmltools_0.4.0 later_1.0.0 RcppParallel_4.4.4
## [95] lubridate_1.7.4 DBI_1.1.0 dbplyr_1.4.2 MASS_7.3-51.5 Matrix_1.2-18 cli_2.0.2 gdata_2.18.0 metap_1.3 igraph_1.2.4.2 pkgconfig_2.0.3 sn_1.5-5 numDeriv_2016.8-1.1 plotly_4.9.2 xml2_1.2.2 vipor_0.4.5 dqrng_0.2.1 multtest_2.42.0 XVector_0.26.0 rvest_0.3.5 bibtex_0.4.2.2 digest_0.6.25 sctransform_0.2.1 RcppAnnoy_0.0.15 tsne_0.1-3 rmarkdown_2.1 cellranger_1.1.0 leiden_0.3.3 uwot_0.1.5 edgeR_3.28.1 DelayedMatrixStats_1.8.0 shiny_1.4.0 gtools_3.8.1 lifecycle_0.1.0 nlme_3.1-145 jsonlite_1.6.1 BiocNeighbors_1.4.2 fansi_0.4.1 limma_3.42.2 pillar_1.4.3 lattice_0.20-40 fastmap_1.0.1 httr_1.4.1 plotrix_3.7-7 survival_3.1-8 glue_1.3.1 FNN_1.1.3 png_0.1-7
## [142] bit_1.1-15.2 stringi_1.4.6 blob_1.2.1 BiocSingular_1.2.2 caTools_1.18.0 memoise_1.1.0 irlba_2.3.3 future.apply_1.4.0 ape_5.3